
clear all
set mem 3g
set matsize 11000
set maxvar 32767 
set more off 
capture log close

/*************************************************************************************
Using the LFS data 1995-1999, this do file constructs:
the high_skill and low_skill percentages of graduates by SIC 1992 or SIC 2003 3-digit sector
*************************************************************************************/

global home "Y:\53523_Sorting"
global working "$home\Working"
global LFS "$home\Syntax\LFS_construct\LFS_1664"
global LFS_original "$home\Original_data\6727_LFS\stata\stata9"
global LFS_added "$working\Data\LFS\added_files"
global logs "$working\Logs\Coaggl"
global temp "$working\Coaggl"
global geodata "$working\Data\GeoData"
global output "$working\Data\Coaggl"


**********************************************************
*Check missings variable of interest

local i=95
while `i'<=99 {

local j=1
while `j'<=4 {

*Uses standard do files that in-sheet and keep variables from LFS files (various quarters and years)
*These are available for authorised users of UK Data Service Secure Lab.

quietly do $LFS\code`i'q`j'.do

disp "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
disp "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
display "This year is `i' & quarter is `j'"
disp "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
disp "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"

disp"keep employed, unemployed and inactive" 
keep if lfstat!=.
tab lfstat, m

disp"REFWKM"
tab refwkm, m

disp"URES"
tab ures, m
tab ures refwkm, m

disp"soc90"
tab soc90, m
tab soc90 refwkm, m
tab soc90 refwkm

disp"skill"
tab skill, m
tab skill refwkm, m
tab skill refwkm

disp"skgav"
tab skgav, m
tab skgav refwkm , m
tab skgav refwkm

disp"lfstat for those who skill is missing"
tab lfstat if skill==.

disp"full-time/part-time if l1==1"
tab ftptm if l1==1,m

display "*********************************"
display "Start cleaning data"
display "*********************************"

/*drop Northern Ireland and ures==.*/
disp"drop NI"
drop if ures==20
disp"drop if ures is missing"
drop if ures==.
rename ures region

rename self semp
rename soc1 soc_1d
rename soc90 soc_3d

display "**************************************************"
display " Further cleaning of variables"
display "**************************************************"

format remserno %16.0f 

tab skill, m
tab skgav, m
disp"replace nkid=0 if nkid==."
replace nkid=0 if nkid==.
disp"drop if skill is missing"
drop if skill==.

disp"keep only employees"
keep if l1==1
drop if semp==1

disp"drop if ward98=-9"
drop if ward98=="-9" 
disp"count if ward98=."
count if ward98==""
disp"count if ward98=-5"
count if ward98=="-5"

sort remserno quota persno hhold 
duplicates tag remserno quota persno hhold , gen(t)
tab t, m

rename weight1 weight
rename ward98 ward982

#delimit ;
keep remserno quota persno hhold year qr refwkm thisw qr
	sex age status skill skgav hoh nkid hhsize white
	student lfstat l1 l2 l3 ftptm pubw allemp soc_1d soc_3d inds92m indd92m
	indm92m rethed secj weight region ward982;
#delimi cr;


compress
disp "---------------------------------------"
save $temp\pnl`i'q`j'.dta, replace
disp "---------------------------------------"

local j=`j'+1
}
local i=`i'+1
display "*********************************"
}

/*appending the dataset*/
use $temp\pnl95q1.dta, clear
append using $temp\pnl95q2
append using $temp\pnl95q3
append using $temp\pnl95q4

local i=96
while `i'<=99 {
local j=1 
while `j'<=4 {
append using $temp\pnl`i'q`j'.dta
local j=`j'+1
}
local i=`i'+1
}
compress
save $temp\panel_allqr_untidy.dta, replace
sum
save, replace

*---------------*
clear all
set mem 3g
set matsize 11000
set maxvar 32767
set more off 
*---------------*

/***Using the untidy dataset***/
use "$temp\panel_allqr_untidy.dta", clear

/*adding sic92 3digit*/

*This is a standard do file that maps four digit sectors into three digit sectors
*A number of sectors have been aggregated; see manuscript for details 

do "Y:\53523_Sorting\Syntax\LFS_construct\sic92_4to3.do"
rename ind3 sic92_3d
drop ind4
rename inds92m sic92_1d

#delimit;
order remserno quota persno hhold year qr refwkm thisw qr
	sex age student rethed lfstat l* ftptm pubw allemp 
	soc_1d soc_3d sic92_1d sic92_3d region ward982;
	
#delimit cr;
duplicates tag remserno quota persno year qr, gen(t)
tab t
drop t
egen id=group(remserno quota persno year qr)
codebook id
order id

replace ward982=trim(ward982)
gen l=length(ward982) 
tab l
gen s4=ward982 if l==4
gen s6=ward982 if l==6
replace s4=trim(s4)
replace s6=trim(s6)
gen s1=0
gen s1s=string(s1)
gen s2s=string(s1)
drop s1
gen wardnew=s1s+s2s+s4 if l==4
replace wardnew=s6 if l==6
replace wardnew="-5" if l==2
rename wardnew ward98
sort ward98
compress
merge m:1 ward98 using "$geodata\ttwa_ward_mapping.dta" 

*Ward to TTWA mapping can be accessed by authorised users via the UK Data Service Secure Lab.
tab _merge
drop if _merge==2
	
rename _merge merge1


sort ward98
merge ward98 using "$geodata\ttwa_ward_stat05_mapping.dta"

*Further corrections to wards/TTWAs; ward to TTWA mapping can be accessed by authorised users via the UK Data Service Secure Lab.
tab _merge
drop if _merge==2

count if year==98
count if year==99
sum ttwa if year==98
sum ttwa if year==99
replace ttwa=ttwa_cen if year==98 & ward98!="-5" & ttwa==. & _merge==3
replace ttwa=ttwa_cen if year==99 & ward98!="-5" & ttwa==. & _merge==3

tab year qr if ward98==""
tab year qr if ward98=="-5"
tab year qr if ward98=="-9"
tab year refwkm if ward98=="-5"
tab year thiswv if ward98=="-5"
tab year qr if ttwa!=.

drop _merge merge1

drop if ttwa==.
*This is about 3.17% of the sample
drop  l s4 s6 s1s s2s flag mode_ttwa ttwa_cen

**************************************************
*Use the classification provided by Gibbons, Overman, Resende, SERC DP 65, 2011 to create "grouped" TTWA and rural/urban divide
rename ttwa TTWA
sort TTWA
merge m:m TTWA using "$geodata\ruralgroup_urban_ttwa_updated.dta"
tab _merge
drop if _merge==2
drop _merge
count if TTWA!=.

**************************
*codebook ttwa_2001 area_code
drop allemp weight thiswv refwkm secj rethed student hhold hoh age l1 l2 l3 ward982 ward98
gen new_area_name=ttwa_name if  area_name=="Urban"

replace new_area_name=area_name if  area_name!="Urban"
gen urban=1 if  area_name=="Urban"
replace urban=0 if  area_code!=. &  area_name!="Urban"
tab urban, m
count

***************************
drop TTWA
gen TTWA= area_code
codebook TTWA
sort TTWA
rename  ttwa_name TTWA_name

#delimit ;
label define ttwa_code_new
11	"Barnsley"  16	"Bedford"  20	"Birmingham"  22	"Blackburn"  23	"Blackpool"  24	"Bolton"  26	"Bournemouth"  27	"Bradford"
33	"Brighton"  34	"Bristol"  36	"Burnley"  40	"Calderdale"  41	"Cambridge"  44	"Cardiff"  48	"Chelmsford"
49	"Cheltenham"  54	"Colchester"  56	"Coventry"  59	"Crawley"  62	"Darlington"  63	"Derby"  66	"Doncaster"
70	"Dudley "  82	"Exeter"  90	"Gloucester"  94	"Grimsby"  95	"Guildford"  98	"Hartlepool" 
99	"Hastings"  107	"Huddersfield"  108	"Hull"  112	"Ipswich"  126	"Leeds"  127	"Leicester"   129	"Liverpool" 
139	"Luton"  141	"N Kent"  143	"Manchester"  144	"Mansfield"  148	"Middlesbrough"  150	"Milton Key."
157	"Newcastle"  158	"Newport"  163	"Northampton"  164	"Norwich"  165	"Nottingham"  171	"Oxford"
177	"Peterborough"  180	"Plymouth"  181	"Poole"  183	"Portsmouth"  184	"Preston"  186	"Reading"  189	"Rochdale"
195	"Sheffield"  201	"Southampton"  202	"Southend"  206	"Stevenage"  208	"Stoke-on-Trent"  211	"Sunderland" 
212	"Swansea"  213	"Swindon"  215	"Telford"  219 "Torbay"  222	"Tunbridge Wells"  225	"Wakefield"  226	"Walsall"
227	"Warrington"  233	"Wirral"  235	"Wolverhampton"  236	"Worcester"  239	"Worthing"
241	"Slough"  243	"York"  1015	"Carlisle"  1016	"Scot. Borders"  1017	"Ashington" 
1018	"Mid NE" 1019	"N Cumbria"  1020	"S Cumbria"  1021	"Kendal"  1022	"Lancaster"
1023	"NE Yorkshire"  1024	"Harrogate"  1025	"NW Yorkshire"  1026	"Scarborough" 
1027	"Chester"  1028	"Northwich"  1029	"Wrexham"  1030	"Shrewsbury"  1031	"Mid-Wales Bord."
1032	"Hereford"  1033	"South-Wales Bord."  1034	"Bridgend"  1035	"South-Mid Wales"  1036	"SW Wales"
1037	"Mid Wales"  1039	"NW Wales"  1040	"North Wales"  1041	"Stafford"  1042	"Burton"  1043	"W Peak Distr."
1044	"Chesterfield"   1045	"Worksop & Retford"  1046	"Scunthorpe"  1047 "E. Lincolnshire"   1048	"W Lincolnshire"  1049	"Norfolk"
1050	"N Norfolk"  1051	"Huntingdon"  1052	"Kettering"  1053	"Warwick"  1054	"Rugby"
1055	"Banbury"   1056	"W East Anglia"  1057	"East Anglia Coast" 
1058	"Harlow"  1059	"Chichester"  1060	"Eastbourne"  1061	"W Kent" 
1062	"Canterbury"  1063	"E Kent"  1064	"I. of Wight"  1065	"Basingstoke"  1066	"Newbury"
1067	"Andover"  1068	"Salisbury"  1069	"Trowbridge"  1070	"Bath"  1071	"E Somerset"
1072	"Yeovil"  1073	"Devon Coast"  1074	"S Devon"  1075	"Taunton"  1076	"N Devon"  1077	"NW Devon" 
1078	"W Cornwall"  1079	"E Cornwall"  
1 "Aberdeen" 73 "Dundee" 79 "Edinburgh" 89 "Glasgow" 123 "Lanarkshire" 1001 "N Scotland" 1002 "Moray Firth" 1003 "W Highlands"	
1004 "Inverness" 1005 "Stirling&All." 1006 "E Highlands"	1007 "Perth&Blairg." 1008 "N of Forth"
1009 "Dunfermline" 1010 "Falkirk"	1011 "Livingston&Bathg." 1012 "Greenock,Arran&Irv." 1013 "Ayr&Kilmarnock" 1014 "N Solway"
135  "London" ;

#delimit cr;
label values TTWA ttwa_code_new
tab TTWA, m
tab TTWA, nol

drop  area_name area_code new_area_name TTWA_name

order id remserno quota persno year qr TTWA urban sic92_3d sic92_1d soc_3d soc_1d

******************************** 
compress
save "$temp\panel_allqr_clean.dta", replace
********************************

use "$temp\panel_allqr_clean.dta", clear

*----focus on manufacturing
keep if sic92_3d>=151 & sic92_3d<=372
sum id soc_3d sic92_3d
keep if urban==1

rename soc_3d soc3d
rename sic92_3d sic3d

*---- Drop tobacco and aggregates some sectors; see manuscript for details
drop if sic3d==160
recode sic3d 183 182 181 = 182
recode sic3d 231 232 233 = 232
recode sic3d 247 246 = 246
recode sic3d 154 = 158
recode sic3d 265 = 266
recode sic3d 223 = 222

egen ind = group(sic3d)
sum ind
gen min_i=r(min)
gen max_i=r(max)

*----computing high_skill and low_skill percentages
sort ind
by ind: egen nempl_high_sic=count(id) if skill==1
by ind: egen nempl_sic=count(id) if skill<.
gen s_high = (nempl_high_sic/nempl_sic)
sum  nempl_high_sic nempl_sic s_high

collapse (mean) s_high nempl_high_sic nempl_sic min_i max_i sic3d sex, by(ind)
replace s_high=0 if s_high==.

xtile high_d = s_high, n(2)
recode high_d 1=0 2=1
label define high_dummy 0"low" 1"high"
label value high_d high_dummy 

gen sic_i=string(sic3d)
gen sic_j=string(sic3d)

save "$output\high_d.dta", replace

use "$output\high_d.dta", clear
rename high_d high_di 
rename sex sexi
keep s_high sic_i high_di sexi
sort sic_i
save "$output\high_di.dta", replace

use "$output\high_d.dta", clear
rename high_d high_dj
rename sex sexj
keep sic_j high_dj sexj
sort sic_j
save "$output\high_dj.dta", replace


